from datasets import load_dataset
import json

# 加载数据集
dataset = load_dataset("TIGER-Lab/MMLU-Pro", split="test")

# 重命名列 question_id -> idx
if "question_id" in dataset.column_names:
    dataset = dataset.rename_column("question_id", "idx")

# 保存为 JSONL 格式
output_path = "/mnt/vepfs/audio/share/wangjunyou/JiuZhang3.0/eval/data/MMLU-Pro/test.jsonl"
with open(output_path, "w", encoding="utf-8") as f:
    for item in dataset:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")